########################################################################################################
##                                                                                                    ##
##  Functions required for replicating analyses in Tsai and Lin. 2017.                                ##
##  "Modeling Guessing Components in the Measurement of Political Knowledge."                         ##
##                                                                                                    ##
########################################################################################################

#####################################
## 
## CALCULATE A HIGHEST POSTERIOR DENSITY (HPD) INTERVAL
##
#####################################
HPDint <- function(obj, prob = 0.95, ...)
{
    obj <- as.matrix(obj)
    vals <- apply(obj, 2, sort)
    if (!is.matrix(vals)) stop("The object must have n.samp > 1")
    n.samp <- nrow(vals)
    npar <- ncol(vals)
    gap <- max(1, min(n.samp - 1, round(n.samp * prob)))
    init <- 1:(n.samp - gap)
    inds <- apply(vals[init + gap, ,drop=FALSE] - vals[init, ,drop=FALSE], 2, which.min)
    ans <- round(cbind(vals[cbind(inds, 1:npar)], vals[cbind(inds + gap, 1:npar)]), 3)
    dimnames(ans) <- list(colnames(obj), c("lower", "upper"))
    
    output <- list(HPDinterval=ans, Probability=gap/n.samp)
    #print(output)
}


########################################################################################################
## PROBABILITY OF CORRECT RESPONSE BASED ON ABILITY
prob.ability <- function(theta, alpha=0, beta=1){

	# theta: latent trait
	# alpha: item difficulty parameter
	# beta: item discrimination parameter

	theta <- theta; alpha <- alpha; beta <- beta;

	z <- beta*(theta - alpha);	
	p.ability <- exp(z)/(1 + exp(z));
	return(p.ability);
}

########################################################################################################
## PROBABILITY OF CORRECT RESPONSE BY GUESSING
prob.guess <- function(theta, alpha=0, beta=1, b=1){

	# theta: latent trait
	# alpha: item difficulty parameter
	# beta: item discrimination parameter
	# b: ability weight parameter

	theta <- theta; alpha <- alpha; beta <- beta; b <- b;

	z <- b*(theta - alpha);
	guess.func <- exp(z)/(1 + 3*exp(z));  # GUESSING FUNCTION
	p.ability <- prob.ability(theta=theta, alpha=alpha, beta=beta);
	
	p.guess <- (1 - p.ability)*guess.func;
	return(p.guess);
}

########################################################################################################
## PROBABILITY OF CORRECT RESPONSE BY PROPOSED IRT GUESSING MODEL
irt.guess1 <- function(theta, alpha=0, beta=1, b=1){

	# theta: latent trait
	# alpha: item difficulty parameter
	# beta: item discrimination parameter
	# b: ability weight parameter

	theta <- theta; alpha <- alpha; beta <- beta; b <- b;
	
	p.ability <- prob.ability(theta=theta, alpha=alpha, beta=beta);
	p.guess <- prob.guess(theta=theta, alpha=alpha, beta=beta, b=b);
	prob.correct <- p.ability + p.guess;
	
	return(cbind("prob.correct"=prob.correct, "prob.guess"=p.guess, "prob.ability"=p.ability) );
}

########################################################################################################
## PROBABILITY OF CORRECT RESPONSE BY CONVENTIONAL 3PL IRT MODEL
irt.3pl <- function(theta, alpha=0, beta=1, gamma=0.25){

	# theta: latent trait
	# alpha: item difficulty parameter
	# beta: item discrimination parameter
	# gamma: asymptotic probability

	theta <- theta;	alpha <- alpha;	beta <- beta; gamma <- gamma;
	
	p.ability <- prob.ability(theta=theta, alpha=alpha, beta=beta);
	prob.correct <- gamma + (1-gamma)*p.ability;
	prob.guess <- (1 - p.ability)*gamma
	
	return(cbind("prob.correct"=prob.correct, "prob.guess"=prob.guess, "prob.ability"=p.ability) );
}


########################################################################################################
## PROPORTION OF CORRECT RESPONSE GIVEN ITEMS AND ESTIMATED LATENT TRAITS
guessed.item <- function(response, theta, n.group=5){
	
	# response: a matrix of responses to items
	# theta:    estimates of latent traits
	# n.group:  the number of groups into which respondents are classified
	
	response <- response; theta <- theta; n.group <- n.group; 
	
	n.items <- dim(response)[2];
	grouping <- quantile(theta, probs=seq(0, 1, 1/n.group));
	grouping[length(grouping)] <- grouping[length(grouping)]+0.1
	
	
	mu.theta <- NA;
	prop.correct <- matrix(NA, nrow=n.group, ncol=n.items);
	
	for (i in 1:n.group) {
		g <- which(theta >= grouping[i] & theta < grouping[i+1]);
		mu.theta[i] <- mean(theta[g]);
			
		for (j in 1:n.items) {
			prop.correct[i,j] <- sum(response[g,j], na.rm=TRUE)/length(g);
		}	
	}
	
	output <- list(mu.theta=mu.theta, prop.correct=prop.correct);
}


########################################################################################################
## PROPORTIONS OF CORRECT PREDICTIONS FOR THE 2PL MODEL
pred.rate <- function(response, theta, beta, alpha, model="2PL", c=0, b=0, M=0){
	
	# response: a matrix of responses to items
	# theta:    a vector of the estimates of latent traits
	# beta:     a vector of the estimate of item discrimination
	# alpha:    a vector of the estimate of item difficulty
	# model:    c("2PL", "3PL", "2PL-G")
	# c:        a vector of the estimate of guessing parameter for 3PL
	# b:        a vector of the estimate of weight parameter for 2PL-G
	# M:        the number of choice options
	
	response <- response; theta <- as.numeric(theta); 
	beta <- as.numeric(beta); alpha <- as.numeric(alpha);
	c <- as.numeric(c); b <- as.numeric(b); M <- M;
	
	N <- dim(response)[1];
	K <- dim(response)[2];
	
	temp.t <- matrix(rep(x=theta, times=K), nrow=N, ncol=K, byrow=FALSE);
	temp.a <- matrix(rep(x=alpha, times=N), nrow=N, ncol=K, byrow=TRUE);
	temp.b <- matrix(rep(x=beta, times=N), nrow=N, ncol=K, byrow=TRUE);
	temp.c <- matrix(rep(x=c, times=N), nrow=N, ncol=K, byrow=TRUE);
	temp.g <- matrix(rep(x=b, times=N), nrow=N, ncol=K, byrow=TRUE);
	
	temp <- temp.b*(temp.t - temp.a);
	logis.cdf <- apply(X=temp, MARGIN=2, FUN=plogis);
	
	if (model=="2PL") {
		pred.p <- logis.cdf;
	}
	else {
		if (model=="3PL") {
			pred.p <- temp.c + (1 - temp.c)*logis.cdf;
		}
		else {
			if (model=="2PL-G") {
				exp.g <- exp(temp.g*(temp.t - temp.a));
				guess <- exp.g/(1+(M-1)*exp.g);
				pred.p <- logis.cdf + (1 - logis.cdf)*guess;
			}
		}
	}	

	pred.response <- pred.p;
	for (i in 1:K) {
		pred.response[which(pred.p[,i] >= 0.5),i] <- 1;
		pred.response[which(pred.p[,i] < 0.5),i] <- 0;
	}

	cond.correct <- c(rep(NA,4));
	for (k in 1:K) {
		index <- which(pred.response[,k] == 1);
		s <- sum(pred.response[index,k] == response[index,k], na.rm=TRUE);
		n <- length(index);
		cond.correct[k] <- s/n;
	}

	n.match <- rowSums(pred.response == response, na.rm=TRUE);
	n.tot <- K - rowSums(x=is.na(response));
	correct.pred <- n.match/n.tot
	
	item.match <- colSums(pred.response == response, na.rm=TRUE);
	n.answer <- N - colSums(x=is.na(response));
	
	output <- list(correct.pred = correct.pred, n.match = n.match, n.tot = n.tot, item.match = item.match, n.answer = n.answer, cond.correct = cond.correct);
}



########################################################################################################
crosstab <- function (..., dec.places = NULL,
                           type = NULL,
                           style = "wide",
                           row.vars = NULL,
                           col.vars = NULL,
                           percentages = TRUE, 
                           addmargins = TRUE,
                           subtotals=TRUE)

###################################################################################
#                                                                                 #
# Function created by Dr Paul Williamson, Dept. of Geography and Planning,        #
# School of Environmental Sciences, University of Liverpool, UK.                  #
#                                                                                 #
# Adapted from the function ctab() in the catspec packge.                         #
#                                                                                 #
# Version: 12th July 2013                                                         #
#                                                                                 #
# Output best viewed using the companion function print.crosstab()                #
#                                                                                 #
###################################################################################

  
  #Declare function used to convert frequency counts into relevant type of proportion or percentage
  {
    mk.pcnt.tbl <- function(tbl, type) {
        a <- length(row.vars)
        b <- length(col.vars)
        mrgn <- switch(type, column.pct = c(row.vars[-a], col.vars), 
                            row.pct = c(row.vars, col.vars[-b]),
                            joint.pct = c(row.vars[-a], col.vars[-b]),
                            total.pct = NULL)
        tbl <- prop.table(tbl, mrgn)
        if (percentages) {
            tbl <- tbl * 100
        }
        tbl
    }
        
    #Find no. of vars (all; row; col) for use in subsequent code
    n.row.vars <- length(row.vars)
    n.col.vars <- length(col.vars)
    n.vars <- n.row.vars + n.col.vars
    

    #Check to make sure all user-supplied arguments have valid values
    stopifnot(as.integer(dec.places) == dec.places, dec.places > -1)
    #type: see next section of code
    stopifnot(is.character(style))    
    stopifnot(is.logical(percentages))
    stopifnot(is.logical(addmargins))
    stopifnot(is.logical(subtotals))
    stopifnot(n.vars>=1)

    #Convert supplied table type(s) into full text string (e.g. "f" becomes "frequency")
    #If invalid type supplied, failed match gives user automatic error message
    types <- NULL
    choices <- c("frequency", "row.pct", "column.pct", "joint.pct", "total.pct")
    for (tp in type) types <- c(types, match.arg(tp, choices))
    type <- types
  
    #If no type supplied, default to 'frequency + total' for univariate tables and to
    #'frequency' for multi-dimenstional tables

    #For univariate table....
    if (n.vars == 1) {
      if (is.null(type)) {
        # default = freq count + total.pct  
        type <- c("frequency", "total.pct")
        #row.vars <- 1
      } else {
        #and any requests for row / col / joint.pct must be changed into requests for 'total.pct'
        type <- ifelse(type == "frequency", "frequency", "total.pct")
      }
    #For multivariate tables...
    } else if (is.null(type)) {
      # default = frequency count  
      type <- "frequency"
    }
    
      
    
    #Check for integrity of requested analysis and adjust values of function arguments as required
    
    if ((addmargins==FALSE) & (subtotals==FALSE)) {
      warning("WARNING: Request to suppress subtotals (subtotals=FALSE) ignored because no margins requested (addmargins=FALSE)")
      subtotals <- TRUE
    }
          
    if ((n.vars>1) & (length(type)>1) & (addmargins==TRUE)) {
      warning("WARNING: Only row totals added when more than one table type requested")
      #Code lower down selecting type of margin implements this...
    }

    if ((length(type)>1) & (subtotals==FALSE)) { 
      warning("WARNING: Can only request supply one table type if requesting suppression of subtotals; suppression of subtotals not executed")
      subtotals <- TRUE
    }
    
    if ((length(type)==1) & (subtotals==FALSE)) {
      choices <- c("frequency", "row.pct", "column.pct", "joint.pct", "total.pct")
      tp <- match.arg(type, choices)
      if (tp %in% c("row.pct","column.pct","joint.pct")) {
          warning("WARNING: subtotals can only be suppressed for tables of type 'frequency' or 'total.pct'")
          subtotals<- TRUE
         }
    }
    
    if ((n.vars > 2) & (n.col.vars>1) & (subtotals==FALSE)) 
      warning("WARNING: suppression of subtotals assumes only 1 col var; table flattened accordingly")
    
    
    if ( (subtotals==FALSE) & (n.vars>2) )  {
    #If subtotals not required AND total table vars > 2
    #Reassign all but last col.var as row vars
    #[because, for simplicity, crosstabs assumes removal of subtotals uses tables with only ONE col var]
    #N.B. Subtotals only present in tables with > 2 cross-classified vars...
      if (length(col.vars)>1) {
        row.vars <- c(row.vars,col.vars[-length(col.vars)])
        col.vars <- col.vars[length(col.vars)]
        n.row.vars <- length(row.vars)
        n.col.vars <- 1
      }
    }
    
    #If dec.places not set by user, set to 2 unlesss only one table of type frequency requested,
    #in which case set to 0.  [Leaves user with possibility of having frequency tables with > 0 dp]
    if (is.null(dec.places)) {
      if ((length(type)==1) & (type[1]=="frequency")) {
        dec.places <- 0
      } else {
        dec.places <-2
      }
    }
 
    #Take the original input data, whatever form originally supplied in,
    #convert into table format using requested row and col vars, and save as 'tbl'

    args <- list(...)    
    
    if (length(args) > 1) {
        if (!all(sapply(args, is.factor))) 
            stop("If more than one argument is passed then all must be factors")
        tbl <- table(...)
    }
    else {
        if (is.factor(...)) {
            tbl <- table(...)
        }
        else if (is.table(...)) {
            tbl <- eval(...)
        }
        else if (is.data.frame(...)) {
            #tbl <- table(...)
            if (is.null(row.vars) && is.null(col.vars)) {
              tbl <- table(...)
            }
            else {
              var.names <- c(row.vars,col.vars)
              A <- (...)
              tbl <- table(A[var.names])
              if(length(var.names==1)) names(dimnames(tbl)) <- var.names
              #[table() only autocompletes dimnames for multivariate crosstabs of dataframes]
            }
        }
        else if (class(...) == "ftable") {
            tbl <- eval(...)
            if (is.null(row.vars) && is.null(col.vars)) {
                row.vars <- names(attr(tbl, "row.vars"))
                col.vars <- names(attr(tbl, "col.vars"))
            }
            tbl <- as.table(tbl)
        }
        else if (class(...) == "ctab") {
            tbl <- eval(...)
            if (is.null(row.vars) && is.null(col.vars)) {
                row.vars <- tbl$row.vars
                col.vars <- tbl$col.vars
            }
            for (opt in c("dec.places", "type", "style", "percentages", 
                "addmargins", "subtotals")) if (is.null(get(opt))) 
                assign(opt, eval(parse(text = paste("tbl$", opt, 
                  sep = ""))))
            tbl <- tbl$table
        }
        else {
            stop("first argument must be either factors or a table object")
        }
    }
    
    #Convert supplied table style into full text string (e.g. "l" becomes "long")
    style <- match.arg(style, c("long", "wide"))

    #Extract row and col names to be used in creating 'tbl' from supplied input data
    nms <- names(dimnames(tbl))
    z <- length(nms)
    if (!is.null(row.vars) && !is.numeric(row.vars)) {
        row.vars <- order(match(nms, row.vars), na.last = NA)
    }
    if (!is.null(col.vars) && !is.numeric(col.vars)) {
        col.vars <- order(match(nms, col.vars), na.last = NA)
    }
    if (!is.null(row.vars) && is.null(col.vars)) {
        col.vars <- (1:z)[-row.vars]
    }
    if (!is.null(col.vars) && is.null(row.vars)) {
        row.vars <- (1:z)[-col.vars]
    }
    if (is.null(row.vars) && is.null(col.vars)) {
        col.vars <- z
        row.vars <- (1:z)[-col.vars]
    }
    
    #Take the original input data, converted into table format using supplied row and col vars (tbl)
    #and create a second version (crosstab) which stores results as percentages if a percentage table type is requested.
    if (type[1] == "frequency") 
      crosstab <- tbl
    else 
      crosstab <- mk.pcnt.tbl(tbl, type[1])

    
    #If multiple table types requested, create and add these to 
    if (length(type) > 1) {
      tbldat <- as.data.frame.table(crosstab)
      z <- length(names(tbldat)) + 1
      tbldat[z] <- 1
      pcntlab <- type
      pcntlab[match("frequency", type)] <- "Count"
      pcntlab[match("row.pct", type)] <- "Row %"
      pcntlab[match("column.pct", type)] <- "Column %"
      pcntlab[match("joint.pct", type)] <- "Joint %"
      pcntlab[match("total.pct", type)] <- "Total %"
      for (i in 2:length(type)) {
          if (type[i] == "frequency") 
              crosstab <- tbl
          else crosstab <- mk.pcnt.tbl(tbl, type[i])
          crosstab <- as.data.frame.table(crosstab)
          crosstab[z] <- i
          tbldat <- rbind(tbldat, crosstab)
      }
      tbldat[[z]] <- as.factor(tbldat[[z]])
      levels(tbldat[[z]]) <- pcntlab
      crosstab <- xtabs(Freq ~ ., data = tbldat)
      names(dimnames(crosstab))[z - 1] <- ""
    }
    
        
    #Add margins if required, adding only those margins appropriate to user request
    if (addmargins==TRUE) {

      vars <- c(row.vars,col.vars)
      
      if (length(type)==1) {
      if (type=="row.pct") 
        { crosstab <- addmargins(crosstab,margin=c(vars[n.vars]))
          tbl <- addmargins(tbl,margin=c(vars[n.vars]))
        }
      else 
        { if (type=="column.pct") 
            { crosstab <- addmargins(crosstab,margin=c(vars[n.row.vars]))
              tbl <- addmargins(tbl,margin=c(vars[n.row.vars]))
            }
          else 
            { if (type=="joint.pct") 
              { crosstab <- addmargins(crosstab,margin=c(vars[(n.row.vars)],vars[n.vars])) 
                tbl <- addmargins(tbl,margin=c(vars[(n.row.vars)],vars[n.vars])) 
              }
              else #must be total.pct OR frequency
                { crosstab <- addmargins(crosstab)
                  tbl <- addmargins(tbl)
                }
            }
  	     } 
      }
      
      #If more than one table type requested, only adding row totals makes any sense...
      if (length(type)>1) {
        crosstab <- addmargins(crosstab,margin=c(vars[n.vars]))
        tbl <- addmargins(tbl,margin=c(vars[n.vars]))
      }
      
    }  
    
    
    #If subtotals not required, and total vars > 2, create dataframe version of table, with relevent
    #subtotal rows / cols dropped [Subtotals only present in tables with > 2 cross-classified vars]
    t1 <- NULL
    if ( (subtotals==FALSE) & (n.vars>2) )  {
    
    #Create version of crosstab in ftable format
    t1 <- crosstab 
    t1 <- ftable(t1,row.vars=row.vars,col.vars=col.vars)

    #Convert to a dataframe
    t1 <- as.data.frame(format(t1),stringsAsFactors=FALSE)

    #Remove backslashes from category names AND colnames
    t1 <- apply(t1[,],2, function(x) gsub("\"","",x))
    #Remove preceding and trailing spaces from category names to enable accurate capture of 'sum' rows/cols
    #[Use of grep might extrac category labels with 'sum' as part of a longer one or two word string...]
    t1 <- apply(t1,2,function(x) gsub("[[:space:]]*$","",gsub("^[[:space:]]*","",x)))

    #Reshape dataframe to that variable and category labels display as required
    #(a) Move col category names down one row; and move col variable name one column to right
    t1[2,(n.row.vars+1):ncol(t1)] <- t1[1,(n.row.vars+1):ncol(t1)]
    t1[1,] <- ""
    t1[1,(n.row.vars+2)] <- t1[2,(n.row.vars+1)]    
    #(b) Drop the now redundant column separating the row.var labels from the table data + col.var labels
    t1 <- t1[,-(n.row.vars+1)]
    
    #In 'lab', assign category labels for each variable to all rows (to allow identification of sub-totals) 
    lab <- t1[,1:n.row.vars]
    for (c in 1:n.row.vars) {
      for (r in 2:nrow(lab)) {
        if (lab[r,c]=="") lab[r,c] <- lab[r-1,c]  
      }
    }

    lab <- (apply(lab[,1:n.row.vars],2,function(x) x=="Sum"))
    lab <- apply(lab,1,sum)
    #Filter out rows of dataframe containing subtotals
    
    t1 <- t1[((lab==0) | (lab==n.row.vars)),]

    #Move the 'Sum' label associated with last row to the first column; in the process
    #setting the final row labels associated with other row variables to ""
    t1[nrow(t1),1] <- "Sum"
    t1[nrow(t1),(2:n.row.vars)] <- ""
    
    #set row and column names to NULL
    rownames(t1) <- NULL
    colnames(t1) <- NULL

    }
    
    
    
    #Create output object 'result' [class: crosstab]
    result <- NULL
    #(a) record of argument values used to produce tabular output
    result$row.vars <- row.vars
    result$col.vars <- col.vars
    result$dec.places <- dec.places
    result$type <- type
    result$style <- style
    result$percentages <- percentages
    result$addmargins <- addmargins
    result$subtotals <- subtotals
    
    #(b) tabular output [3 variants]
    result$table <- tbl  #Stores original cross-tab frequency counts without margins [class: table]
    result$crosstab <- crosstab #Stores cross-tab in table format using requested style(frequency/pct) and table margins (on/off)
                                #[class: table]  
    result$crosstab.nosub <- t1  #crosstab with subtotals suppressed [class: dataframe; or NULL if no subtotals suppressed]  
    class(result) <- "crosstab"    

    #Return 'result' as output of function
    result
    
}